/*
PREPARE DATA
*/
use [Raw World Poll data is not available to the public]

ren WP4 time
gen time_string=time
format time_string %td

gen year=year(time)
gen month=month(time)
egen yrm= concat(month year), punc("-")
gen time_month=date(yrm, "MY")

*Naming conventions
merge m:1 WP5 using  "[]\country_naming_conventions.dta"
drop if _merge==2
drop _merge


**Process data**
foreach x in  WP21757 WP21758 WP21759 WP21760 WP21761 WP21767 WP21768 {
label list `x'
}

foreach x in  WP21758 WP21759 WP21760 WP21761  WP21768 {
gen YES_`x'=1 if `x'==1
replace YES_`x'=0 if `x'==2
}

foreach x in  WP21758 WP21759 WP21760 WP21761  WP21768 {
gen YES_`x'_rebase=1 if `x'==1
replace YES_`x'_rebase=0 if `x'==2 | `x'==3
}


gen affected_alot=1 if WP21757==1
replace affected_alot=0 if WP21757==2 | WP21757==3

ren YES_WP21758 temp_layoff
ren YES_WP21759 lost_job
ren YES_WP21760 lost_hours
ren YES_WP21761 lost_income
ren YES_WP21768 vaccine

ren YES_WP21758_rebase temp_layoff_rebase
ren YES_WP21759_rebase lost_job_rebase
ren YES_WP21760_rebase lost_hours_rebase
ren YES_WP21761_rebase lost_income_rebase
ren YES_WP21768_rebase vaccine_rebase

gen layoff=1 if temp_layoff==1 | lost_job==1
replace layoff=0 if temp_layoff==0 & lost_job==0

foreach x in temp_layoff lost_job lost_hours lost_income affected_alot {
egen SD`x'=std(`x')
}
egen harm_index=rowmean(SDtemp_layoff SDlost_job SDlost_hours SDlost_income SDaffected_alot)
sum harm_index

**Factor analysis, define PCA-based harm index
pca SDtemp_layoff SDlost_job SDlost_hours SDlost_income SDaffected_alot [aw=WGT], covariance
predict harm_pca, score
factor SDtemp_layoff SDlost_job SDlost_hours SDlost_income SDaffected_alot [aw=WGT], mineigen(1)
predict harm_factor
alpha SDtemp_layoff SDlost_job SDlost_hours SDlost_income SDaffected_alot , item
cor harm_index harm_factor covid_econ [aw=WGT]

label list WP3117
gen elementary=1 if WP3117==1
replace elementary=0 if WP3117!=1
gen secondary=1 if WP3117==2
replace secondary=0 if WP3117!=2
gen tertiary=1 if WP3117==3
replace tertiary=0 if WP3117!=3

gen age=WP1220  
gen age2=WP1220 ^2
gen age3=WP1220 ^3
egen ageg=cut(age), at(15,20,30,40,50,65,80,101)
tab ageg, gen(ageg)
label var ageg1 "age 15-19"
label var ageg2 "age 20-29"
label var ageg3 "age 30-39"
label var ageg4 "age 40-49"
label var ageg5 "age 50-64"
label var ageg6 "age 65-79"
label var ageg7 "age 80+"

tab INCOME_5 , gen(incQ)

gen male=1 if WP1219==1
replace male=0 if WP1219==2
tab yrm, gen(yrm)

gen female=1 if WP1219==2
replace female=0 if WP1219==1

gen foreign=1 if WP4657==2
replace foreign=0 if WP4657==1
label var foreign "born outside of country"

gen out_labor=1 if EMP_2010==6
replace out_labor=0 if EMP_2010<=5
label var out_labor "not in labor force"

gen not_working=1 if EMP_2010==4 | EMP_2010==6
replace not_working=0 if EMP_2010<4
replace not_working=0 if EMP_2010==5
label var not_working "out of labor for or unemployed"

gen employed_FT=1 if EMP_2010==1
replace employed_FT=0 if EMP_2010!=1
gen self_employed_FT=1 if EMP_2010==2
replace self_employed_FT=0 if EMP_2010!=2
gen unemployed=1 if EMP_2010==4
replace unemployed=0 if EMP_2010!=4
gen employed=1 if EMP_2010<=3 | EMP_2010==5
replace employed=0 if EMP_2010==6 | EMP_2010==4
gen lab_force=1 if EMP_2010!=6
replace lab_force=0 if EMP_2010==6

gen have_children=1 if WP1230>0 & WP1230<98
replace have_children=0 if WP1230==0 | WP1230>=98
label var have_children "live with children under age 15"

gen mother=1 if have_children==1 & female==1
replace mother=0 if have_children==0 | female==0
label var mother "Female living with children under 15"

label list WP14
gen city=1 if WP14==3 
replace city=0 if WP14!=3 & WP14!=4 & WP14!=5
gen suburb=1 if WP14==6
replace suburb=0 if WP14!=6 & WP14!=4 & WP14!=5
gen rural=1 if WP14==1
replace rural=0 if WP14!=1 & WP14!=4 & WP14!=5
gen town=1 if WP14==2
replace town=0 if WP14!=2 & WP14!=4 & WP14!=5

gen married=1 if WP1223==2
replace married=0 if WP1223!=2
label var married "married or with domestic partner"

gen not_enough_food=1 if WP40==1
replace not_enough_food=0 if WP40==2
label var not_enough_food "Not enough money for food in last 12 months"

gen life_eval=WP16
replace life_eval=. if WP16>10

gen living_better=1 if WP31==1
replace living_better=0 if WP31==2 | WP31==3

gen living_worse=1 if WP31==3
replace living_worse=0 if WP31==2 | WP31==1

*Worry, saddness, anger
ren WP69 worry
ren WP71 stress
ren WP74 anger
tab worry
foreach x in worry stress anger {
gen Y`x'=1 if `x'==1
replace Y`x'=0 if `x'==2
}

gen low_life=1 if life_eval<7
replace low_life=0 if life_eval>=7 & life_eval!=.

global harm "unemployed harm_index  lost_job lost_income affected_alot temp_layoff lost_hours" 
global wellbeing "living_worse not_enough_food Yworry low_life life_eval"  
global controls "out_labor city suburb town foreign elementary secondary ageg1 ageg2 ageg3 ageg4 ageg5 ageg6   married have_children mother female  incQ1 incQ2 incQ3 incQ4"

label list covid_econ "Job loss, income loss, and loss of hours"
label list harm_factor "First principal components of factor analysis of harm items"

keep $harm $wellbeing $controls  WP5 yrm month year time time_string time_month countrycode WGT ///
WP14 WP1223 WP1230 EMP_2010 WP1219 WP4657 WP1220 Ystress Yanger employed lab_force WP21757 covid_econ harm_factor layoff

save "HSSC replication data\Gallup data\cleaned_worldpoll_microdata.dta", replace
